

/*
clean TreasuryDirect auction data
input data comes from scraped TD data

output:
- td_auction_data_clean.dta: combined multiple auction datasets
- td_auction_notesbonds_clean.dta: main dataset of relevant info for notes and bonds

*/


********************************************************************************
********************************************************************************
* clean TreasuryDirect auction data for PH shock regressions
n di "cleaning TreasuryDirect data"
use ../data/input/td_auction_data.dta, clear

* merge and update other auction data
merge 1:1 cusip auctionDate using ../data/input/td_close_release_times.dta, nogen 
merge 1:1 cusip auctionDate using ../data/input/td_competitive_results.dta, update nogen
merge 1:1 cusip auctionDate using ../data/input/td_supplement.dta, update nogen

* note: small differences in somaAccepted in both datasets
* use TreasuryDirect
merge 1:1 cusip issueDate using ../data/input/td_investor_allocations.dta, update nogen

* convert datetime objects to stata daily dates
local datevars announcementDate auctionDate backDatedDate callDate ///
	calledDate datedDate firstInterestPaymentDate frnIndexDeterminationDate ///
	issueDate maturingDate maturityDate originalDatedDate originalIssueDate
foreach var of local datevars {
	replace `var' = dofc(`var')
	format `var' %td
}

* create numeric term length
destring securityTermDayMonth, gen(term_month) ignore("-Month") force
destring securityTermDayMonth, gen(term_day) ignore("-Day") force
destring securityTermWeekYear, gen(term_year) ignore("-Year") force
gen term_num = term_year + term_month/12 if term_day==.
replace term_num = term_day/365 if term_day!=.
drop term_month term_day term_year

* groups: 1, 2, 3, 4, 5, 7, 10, 15, 20, 30
* post-1995 notes and bonds only include: 2, 3, 5, 7, 10, 30
* coarse grouping for bills (<=1 year) 
* use x.5 to properly group reopenings
gen term_group = .
replace term_group = 1 if term_group==. & term_num<=1.5
replace term_group = 2 if term_group==. & term_num<=2.5
replace term_group = 3 if term_group==. & term_num<=3.5
replace term_group = 4 if term_group==. & term_num<=4.5
replace term_group = 5 if term_group==. & term_num<=5.5
replace term_group = 7 if term_group==. & term_num<=7.5
replace term_group = 10 if term_group==. & term_num<=10.5
replace term_group = 15 if term_group==. & term_num<=15.5
replace term_group = 20 if term_group==. & term_num<=20.5
replace term_group = 30 if term_group==. & term_num<=31


* fill in bid to cover for handful of early auctions reporting competitive bids
* create total bids data for "public"
replace noncompetitiveTendered = noncompetitiveAccepted if noncompetitiveTendered==.
gen double publicAccepted = competitiveAccepted + noncompetitiveAccepted
gen double publicTendered = competitiveTendered + noncompetitiveTendered
replace bidToCoverRatio = publicTendered / publicAccepted if bidToCoverRatio==.

* difference between accepted and offering amounts - SOMA, FIMA, etc
* note: doesn't replicate the bid-to-cover ratio
* when SOMA/FIMA are included in offering amount
* but is the closest for older auctions that don't report breakdowns
gen b2c_all = bidToCoverRatio
replace b2c_all = ///
	(totalTendered - (totalAccepted-offeringAmount)) / offeringAmount ///
	if b2c_all==.


* bidder type bid-to-cover and fraction
local bidder_type directBidder indirectBidder primaryDealer noncompetitive
foreach bid of local bidder_type {
	* bid to cover by groups: relative to total offering amount
	gen b2c_`bid' = `bid'Tendered / publicAccepted
	gen frac_`bid' = `bid'Accepted / publicAccepted
}
* other breakdowns (only accepted)
* note: rounding in allocation data; some frac slightly greater than 1
local bidder_type2 depository individuals dealers ///
	pensions investmentFunds foreign other
foreach bid of local bidder_type2 {
	gen double frac_`bid' = `bid'Accepted / publicAccepted
}
* combine small groups into misc
gen double frac_misc = 1 - frac_dealers - frac_investmentFunds - frac_foreign


* high/median/low yields
* "price-based" auctions in early 80s switched high and low rates -- fix
tempvar high
tempvar idx_replace
gen `idx_replace' = highYield < lowYield & auctionFormat=="Price-Based"
gen `high' = highYield
replace highYield = lowYield if `idx_replace'
replace lowYield = `high' if `idx_replace'
drop `high' `idx_replace'

* save
sort auctionDate cusip
save ../data/td_auction_data_clean.dta, replace



********************************************************************************
* create subset of Notes and Bonds, for intraday analysis
use ../data/td_auction_data_clean.dta, clear
* only note and bond (non-callable) auctions, with close and release times
keep if type=="Note" | type=="Bond"
drop if callable==1
drop if missing(close_time) | missing(release_time)

* October 8, 2008: unusual auction announced on same day as close
drop if auctionDate==mdy(10,8,2008) & announcementDate==mdy(10,8,2008)
* June 21, 2019: "small contignecy plan" test auction
drop if auctionDate==mdy(6,12,2019) & announcementDate==mdy(6,12,2019) 
* handful of auctions with identical close times
drop if auctionDate==mdy(10,9,2008)
drop if auctionDate==mdy(9,12,2016)
duplicates tag close_time, generate(assert_no_dups)
assert assert_no_dups==0
drop assert_no_dups

* spreads
gen spread_high_median = highYield - averageMedianYield
gen spread_high_low = highYield - lowYield

* (log) size of auction
gen double log_offering = log(offeringAmount)
gen double log_public_accepted = log(publicAccepted)
gen double log_public_tendered = log(publicTendered)
gen double log_total_tendered = log(totalTendered)

* only keep main variables and save
keep cusip auctionDate type reopening ///
	offeringAmount publicAccepted publicTendered totalTendered highYield ///
	log_offering log_public_accepted log_public_tendered log_total_tendered ///
	term_num term_group b2c_* frac_* spread_* close_time release_time
save ../data/td_auction_notesbonds_clean.dta, replace



